/* Data analysis of Baseline Data for Nepal CCF: Children */

local folder "C:\Users\mlevere\OneDrive - Mathematica\Documents\Projects\Nepal\"

/* Merge together all the sections into one big dataset 
   Rename all the variables along the way */
   
/* First, get the ID-codes of children under age 3 */
use "`folder'/Data/Baseline_Raw/SEC10B", clear
rename q10_name child_name
rename q10_1007dd child_bday_day
rename q10_1007mm child_bday_month
rename q10_1007yy child_bday_year
rename q10_1008 child_complications
rename q10_1009 child_length_breastfed
rename q10_1010 child_occulated
rename q10_1011 child_height
rename q10_1012 child_height_laying
rename q10_1013 child_weight
rename q10_1014 child_weight_alone
rename q10_1015 child_noanthro_reasons
rename q10_idc q11_idc
tempfile baseline_raw_child
save "`baseline_raw_child'", replace

/* 207 that are not merging correctly with ASQ:
   183 are for kids who are less than 1 month old, i.e. no ASQ expected
   14 say that ASQ should have been completed, 4 say eligible woman not home, 1 says no more set
   21, 22, 3 (only one kid)
   95, 4, 4 (two kids, only one set completed)
   251, 33, 4 (two kids, only one set completed)
   261, 9, 7 (two kids, second is valid missing)
   273, 28, 11 (says 2 completed)
   333, 12, 1  (two kids, only one set completed)
   414, 12, 2 (only one kid)
   447, 25, 4  (two kids, only one set completed)
   510, 5, 2  (two kids, only one set completed)
   521, 13, 3  (two kids, only one set completed)
   528, 9, 14 (six sets, says 2 completed)
   579, 25, 7 (three sets, says 2 completed)
   584, 10, 10  (two kids, only one set completed)
   596, 4, 8 (only one kid)
   
   4 who seem to have ASQ data but no anthropometrics, but no reason why not   
   202, 12: ASQ is for IDCODE 6, but missing data for anthropometrics
   206, 1, 4: This is the one that is identical and dropped
   265, 25, 4: ASQ should have IDCODE == 8/9 (25 month old)
   506, 21, 2: IDCODE == 4/5 (1 month old, 30 month old) */
   
merge 1:1 www hh idc q11_idc using "`folder'/Data/Baseline_Raw/SEC11", nogen

/* Clean up ASQ data to be 0/1, create scores of modules */
label define asq_label 0 "No" 1 "Yes"

forvalues j=1/9 {
replace q11_1101a0`j' = 0 if q11_1101a0`j' == 2
label values q11_1101a0`j' asq_label
replace q11_1101b0`j' = 0 if q11_1101b0`j' == 2
label values q11_1101b0`j' asq_label
replace q11_1101c0`j' = 0 if q11_1101c0`j' == 2
label values q11_1101c0`j' asq_label
replace q11_1101d0`j' = 0 if q11_1101d0`j' == 2
label values q11_1101d0`j' asq_label
replace q11_1101e0`j' = 0 if q11_1101e0`j' == 2
label values q11_1101e0`j' asq_label
}

forvalues j=10/32 {
replace q11_1101a`j' = 0 if q11_1101a`j' == 2
label values q11_1101a`j' asq_label
}

forvalues j=10/33 {
replace q11_1101b`j' = 0 if q11_1101b`j' == 2
label values q11_1101b`j' asq_label
}

forvalues j=10/29 {
replace q11_1101c`j' = 0 if q11_1101c`j' == 2
label values q11_1101c`j' asq_label
}

forvalues j=10/32 {
replace q11_1101d`j' = 0 if q11_1101d`j' == 2
label values q11_1101d`j' asq_label
}

forvalues j=10/34 {
replace q11_1101e`j' = 0 if q11_1101e`j' == 2
label values q11_1101e`j' asq_label
}

egen asq_communication = rowtotal(q11_1101a01-q11_1101a32)
egen asq_grossmotor = rowtotal(q11_1101b01-q11_1101b33)
egen asq_finemotor = rowtotal(q11_1101c01-q11_1101c29)
egen asq_problemsolving = rowtotal(q11_1101d01-q11_1101d32)
egen asq_personalsocial = rowtotal(q11_1101e01-q11_1101e34)

label variable asq_communication "ASQ Composite Commuication Score"
label variable asq_grossmotor "ASQ Composite Gross Motor Score"
label variable asq_finemotor "ASQ Composite Fine Motor Score"
label variable asq_problemsolving "ASQ Composite Problem Solving Score"
label variable asq_personalsocial "ASQ Composite Personal Social Score"

label define asq_comp_label 7 "Oops"

label values asq_communication asq_comp_label
label values asq_grossmotor asq_comp_label
label values asq_finemotor asq_comp_label
label values asq_problemsolving asq_comp_label
label values asq_personalsocial asq_comp_label

/* Grab the women's data:
   585 are not matched: these include 347 for women who are pregnant for the first time,
   and 238 who are pregnant with a kid older than 36 months (except 4 weird ones) */  
merge m:1 www hh idc using "`folder'/Data/baseline_raw_women", nogen keep(matches)

save "`baseline_raw_child'", replace

/* Get the identification of age etc from the roster */
rename q11_idc q03_idc

preserve
use "`folder'/Data/Baseline_Raw/SEC03", clear
* Create birth order variable for Child
gen elig_type = q03_302 if q03_304a <= 1

by www hh: egen elig_child = max(elig_type)

by www hh: egen newborn = max(q03_302) if elig_child == . & (q03_302 == 3 | q03_302 == 4)
by www hh: egen newborn_child = max(newborn)

replace elig_child = newborn_child if elig_child == .

drop elig_type newborn newborn_child

gen kid_type = elig_child == q03_302

gsort www hh kid_type -q03_304a -q03_304b

by www hh kid_type: gen birth_order = _n if kid_type == 1

by www hh: egen minage = min(q03_304a) if birth_order ~= .

replace birth_order = . if minage >= 13

drop elig_child kid_type minage

sort www hh q03_idc
tempfile SEC03
save "`SEC03'", replace
restore

merge 1:1 www hh q03_idc using "`SEC03'", nogen keep(matches) keepusing(q03_303 q03_304a q03_304b birth_order)
rename q03_303 child_sex
rename q03_304a child_age_years
rename q03_304b child_age_months
gen child_tot_months = child_age_years*12 + child_age_months
save "`baseline_raw_child'", replace

preserve
use "`folder'/Data/Baseline_Raw/sec06B", clear
by www hh q06_idc, sort: keep if _n == 1
tempfile SEC06B_clean
save "`SEC06B_clean'", replace
restore

/* Add in the morbidity from section 6 */
rename q03_idc q06_idc
merge 1:1 www hh q06_idc using "`SEC06B_clean'", nogen keep(1 3)
gen child_sick = q06_602 ~= ""
rename q06_603 child_illness_type
rename q06_604 child_illness_where
rename q06_605 child_illness_consult_whom
rename q06_606 child_illness_noconsult
drop q06_602
save "`folder'/Data/baseline_raw_child", replace
